{
	"model_name_or_path": "facebook/llama-7b",
	"tokenizer_name_or_path": "facebook/llama-7b",
	"input_dir": "./data",
	"output_dir": "./checkpoints/llama_pretrain_ckpts",
	"per_device_train_batch_size": 1,
	"gradient_accumulation_steps": 8,
	"per_device_eval_batch_size": 2,
	"tensor_parallel_degree": 8,
	"pipeline_parallel_degree": 1,
	"sharding": "stage3",
	"virtual_pp_degree": 1,
	"sequence_parallel": 0,
	"use_flash_attention": true,
	"use_fused_rms_norm": true,
	"use_fused_rope": true,
	"max_seq_length": 4096,
	"learning_rate": 3e-05,
	"min_learning_rate": 3e-06,
	"warmup_steps": 30,
	"logging_steps": 1,
	"max_steps": 100,
	"save_steps": 5000,
	"eval_steps": 1000,
	"weight_decay": 0.01,
	"bf16": true,
	"fp16_opt_level": "O2",
	"warmup_ratio": 0.01,
	"max_grad_norm": 1.0,
	"dataloader_num_workers": 1,
	"continue_training": 0,
	"do_train": true,
	"do_eval": false,
	"do_predict": false,
	"disable_tqdm": true,
	"recompute": true,
	"distributed_dataloader": 1,
	"recompute_granularity": "full",
	"save_total_limit": 2,
	"sharding_parallel_degree": 1,
	"autotuner_benchmark": 1
}
